made by Ashraf Salih
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [8]:
import plotly as pt
Data import
In [8]:
fuel = pd.read_csv('datasets/fuel.csv')
In [9]:
fuel
Out[9]:
| Horse Power | Fuel Economy (MPG) | |
|---|---|---|
| 0 | 118.770799 | 29.344195 |
| 1 | 176.326567 | 24.695934 |
| 2 | 219.262465 | 23.952010 |
| 3 | 187.310009 | 23.384546 |
| 4 | 218.594340 | 23.426739 |
| ... | ... | ... |
| 95 | 162.810542 | 27.418661 |
| 96 | 266.869640 | 15.989945 |
| 97 | 243.831211 | 19.253375 |
| 98 | 140.959803 | 29.515593 |
| 99 | 184.123053 | 25.196097 |
100 rows × 2 columns
In [10]:
fuel.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100 entries, 0 to 99 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Horse Power 100 non-null float64 1 Fuel Economy (MPG) 100 non-null float64 dtypes: float64(2) memory usage: 1.7 KB
In [11]:
fuel.describe()
Out[11]:
| Horse Power | Fuel Economy (MPG) | |
|---|---|---|
| count | 100.000000 | 100.000000 |
| mean | 213.676190 | 23.178501 |
| std | 62.061726 | 4.701666 |
| min | 50.000000 | 10.000000 |
| 25% | 174.996514 | 20.439516 |
| 50% | 218.928402 | 23.143192 |
| 75% | 251.706476 | 26.089933 |
| max | 350.000000 | 35.000000 |
In [11]:
import plotly.express as px
Visualitation
In [12]:
px.scatter(fuel,x='Horse Power',y='Fuel Economy (MPG)')
In [12]:
sns.scatterplot(data = fuel, x = 'Horse Power', y = 'Fuel Economy (MPG)')
plt.show()
In [13]:
sns.regplot(x='Horse Power', y='Fuel Economy (MPG)', data=fuel)
plt.show()
In [13]:
px.histogram(fuel,x='Fuel Economy (MPG)')
In [14]:
px.histogram(fuel,x='Horse Power')
Splitting Data
In [16]:
x =fuel[['Horse Power']]
y=fuel[['Fuel Economy (MPG)']]
In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)
Training
In [22]:
from sklearn.linear_model import LinearRegression
In [25]:
reg= LinearRegression()
In [26]:
reg.fit(X_train,y_train)
Out[26]:
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [29]:
reg.coef_
Out[29]:
array([[-0.07203114]])
In [30]:
reg.intercept_
Out[30]:
array([38.57636334])
In [32]:
reg.predict([[300]])
C:\Users\Ashra\anaconda3\Lib\site-packages\sklearn\base.py:493: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
Out[32]:
array([[16.96702159]])
In [33]:
y_pred=reg.predict(X_test)
y_pred
Out[33]:
array([[31.98981696],
[19.35343919],
[28.61699715],
[29.89774657],
[28.46086385],
[26.14064374],
[24.76692693],
[24.2315321 ],
[16.06719351],
[18.0526321 ],
[22.50047897],
[22.41674787],
[20.88842194],
[28.93693583],
[21.01969214],
[28.42286818],
[19.71485757],
[25.87535982],
[21.97587415],
[22.78263821],
[20.86963943],
[22.17700037],
[29.02002311],
[32.48454639],
[25.94578138],
[15.45787143],
[25.9003797 ],
[17.36852921],
[19.97011895],
[15.82766917]])
In [34]:
y_test.values
Out[34]:
array([[31.80670649],
[15.98994481],
[27.47341504],
[29.67863744],
[28.88208128],
[27.38701207],
[23.55672887],
[23.01119391],
[15.61895639],
[18.87834992],
[23.3071922 ],
[21.70120173],
[19.13999943],
[30.67480326],
[21.44270298],
[29.51559288],
[21.05039889],
[24.6959341 ],
[20.71572205],
[23.95201001],
[22.84971109],
[21.20598653],
[26.77814827],
[34.11402465],
[26.05708471],
[15.44171107],
[26.18847756],
[17.27421781],
[21.26177779],
[16.8311881 ]])
In [35]:
reg.score(x,y)
Out[35]:
0.9095292365456379
In [36]:
from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred))
0.9340266063799337
In [49]:
plt.scatter(X_test, y_test)
plt.plot(X_test, y_pred, color = 'r')
plt.xlabel('Horse Power (HP)')
plt.ylabel('MPG')
plt.title('HP vs. MPG (Testing Set)')
plt.show()